import requests
from bs4 import BeautifulSoup
import re


headers = {
    'Authority': 'www.ayosemarang.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}
# Step 1: 获取文章链接
def get_article_links():
    url = 'https://www.ayosemarang.com'
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    article_links = []

    for link in soup.find_all('a', href=re.compile(r'https://www.ayosemarang.com/[\d]+/[\d]+/[\d]+/[\w-]+')):
        article_links.append(link['href'])

    return article_links

# Step 2: 获取文章内容和标题
def get_article_content(article_links): 
    articles = []

    for link in article_links:
        response = requests.get(link, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        title = soup.find('h1', class_='entry-title').text.strip()
        content = soup.find('div', class_='entry-content').text.strip()

        articles.append((title, content))

    return articles

# Step 3: 写入文章内容到指定文件
def write_articles_to_files(articles):
    for title, content in articles:
        with open(f'F:/tmp/1/2/{title}.txt', 'w', encoding='utf-8') as file:
            file.write(title)
            file.write('\n')
            file.write(content)


if __name__ == '__main__':
    # article_links = get_article_links()
    dd = {"https://www.ayosemarang.com/semarang-raya/7710736055/8-guru-kota-semarang-catat-prestasi-pemkot-berikan-apresiasi-khusus"}
    articles = get_article_content(dd)
    write_articles_to_files(articles)
